## packages
suppressPackageStartupMessages(require(tidyverse))
suppressPackageStartupMessages(require(janitor))
suppressPackageStartupMessages(require(chipPCR))
## working directory
workdir <- "C://Users/u4667515/Dropbox/Collab_Projects/Covid19"
setwd(workdir)
## arrange sample information
### RSB LC480
rsb_data_path <- file.path(workdir, "RawData_v2_standardised/")
rsb_files_rn <- dir(file.path(workdir, "RawData_v2_standardised/"), pattern = "raw-fluorescence")
rsb_files_ct <- dir(file.path(workdir, "RawData_v2_standardised/"), pattern = "Ct-values")
rsb_files_keyfile <- dir(file.path(workdir, "RawData_v2_standardised/"), pattern = "sample-sheet")
## JCSMR TaqMan
jcsmr_dir <- file.path(workdir, "RawData_JCSMR_v1/")
jcsmr_data_paths <- file.path(jcsmr_dir, dir(jcsmr_dir))
jcsmr_files_rn <- dir(jcsmr_data_paths, pattern = "Amplification-Data")
jcsmr_files_ct <- dir(jcsmr_data_paths, pattern = "Ct-values")
jcsmr_files_keyfile <- dir(jcsmr_data_paths, pattern = "sample-sheet")
### arrange sample information, fluorescence & ct values
keyfile_rsb <- tibble(filename = rsb_files_keyfile) %>%
mutate( file_contents = map(filename,~ read_delim(file.path(rsb_data_path, .), delim = '\t'))) %>%
unnest %>% clean_names %>%
mutate(filename = paste(sapply(strsplit(filename, "_sample"), function(l) l[1]), well, sep='_')) %>% select(-well)
ct_rsb <- tibble(filename = rsb_files_ct) %>%
mutate( file_contents = map(filename,~ read_delim(file.path(rsb_data_path, .), delim = '\t'))) %>%
unnest %>% clean_names %>%
mutate(probe = ifelse(sapply(strsplit(filename, "_"), function(l) l[4]) == "SYBR", yes = "SYBR", no=probe )) %>%
mutate(filename = paste(sapply(strsplit(filename, "_Ct"), function(l) l[1]), well, sep='_')) %>%
select(-well) %>%
mutate(plate = sapply(strsplit(filename, "_"), function(l) l[3]))
data_rsb <- tibble(filename = rsb_files_rn) %>%
mutate( file_contents = map(filename,~ read_delim(file.path(rsb_data_path, .), delim = '\t'))) %>%
unnest %>% clean_names %>%
mutate(filename = paste(sapply(strsplit(filename, "_raw"), function(l) l[1]), well, sep='_')) %>%
left_join(keyfile_rsb, by="filename") %>%
na.omit %>% ## only clear empty wells
left_join(ct_rsb, by="filename") %>%
mutate(plate = sapply(strsplit(filename, "_"), function(l) l[3])) %>%
filter( ct_value != 0 ) ## only wells with correct product
keyfile_jcsmr <- tibble(filename = jcsmr_files_keyfile) %>%
mutate( file_contents = map(filename,~ read_delim(file.path(jcsmr_data_paths, .), delim = ' ', col_names = F))) %>%
unnest %>% clean_names %>% rename(well=x1, sample_name=x2, reaction_volume=x3, well_number=x4, dilution=x5) %>%
mutate(filename = paste(sapply(strsplit(filename, "_sample"), function(l) l[1]), well_number, sep='_')) %>%
select(-well, -well_number)
ct_jcsmr <- tibble(filename = jcsmr_files_ct) %>%
mutate( file_contents = map(filename,~ read_csv(file.path(jcsmr_data_paths, .), skip = 36))) %>%
unnest %>% clean_names %>%
select(filename, well, sample_name, target_name, reporter, ct) %>%
rename(probe=reporter) %>%
mutate(type = sapply(strsplit(filename,"_Ct-values-"), function(l) l[2])) %>%
mutate(filename = paste(sapply(strsplit(filename,"_Ct-values-"), function(l) l[1]), well, sep='_')) %>%
mutate(type = sapply(strsplit(type, ".csv"), function(l) l[1])) %>%
mutate(ct = as.numeric(paste(ifelse(ct == "Undetermined", yes = 0, no = ct)))) %>%
spread(type, ct) %>%
mutate(plate_name = paste(sapply(strsplit(filename, "_DS"), function(l) l[1]), "DS", sep='_'))
data_jcsmr <- tibble(filename = jcsmr_files_rn) %>%
mutate( file_contents = map(filename,~ read_csv(file.path(jcsmr_data_paths, .), skip = 36,
col_types = list(col_number(), col_number(), col_character(), col_number(), col_number())))) %>%
unnest %>% clean_names %>% rename(fluorescence=rn) %>% select(-delta_rn) %>% na.omit %>%
mutate(plate_name = sapply(strsplit(filename, "_Amp"), function(l) l[1])) %>%
mutate(filename = paste(sapply(strsplit(filename, "_Amp"), function(l) l[1]), well, sep='_')) %>%
left_join(keyfile_jcsmr, by="filename") %>%
mutate(sample_name = ifelse(is.na(sample_name) == T,
yes = ct_jcsmr$sample_name[match(filename, ct_jcsmr$filename)],
no = sample_name)) %>%
mutate(sample_name = sapply(strsplit(sample_name, "_rep"), function(l) l[1]))
## Organise fluorescence curves for target plate(s) e.g RSB plate 9
plate_ids <- unique(data_rsb$plate)[9]
a <- subset(data_rsb, plate == plate_ids)
my_list_rsb <- list()
for (i in unique(a$probe_amplicon)) {
print(i)
## subset amplicons on plate to analyse
b <- filter(a, probe_amplicon == i) %>% select(filename, cycle, fluorescence) %>%
spread(filename, fluorescence) %>% as.data.frame
## give short form sample name
sample_ids <- colnames(b)[2:ncol(b)]
sample_ids <- paste(data_rsb$sample_name[match(sample_ids, data_rsb$filename)],
i, sep = "_")
colnames(b)[2:ncol(b)] <- sample_ids
## QC plots plot(MFIaggr(x=b)) ## plot Mean Fluorescence Intensity by cycle
## per target per plate plotCurves(x=b[,1], y=b[,c(2:ncol(b))], type='l', CPP
## = TRUE) ## initial visual assessment with CPP QC
## keep all data in a list
my_list_rsb[[i]] <- b
}
## [1] "C-ORF1ab"
## [1] "HKU-ORF1b"
## [1] "nCov_IP2"
## [1] "COV19_SG2"
## [1] "C-N"
## [1] "HKU-N"
## [1] "nCov_IP4"
## [1] "GAPDH"
## [1] "E_Sarbeco"
## e.g JCSMR
plate_name = unique(data_jcsmr$plate_name)[1]
a <- subset(data_jcsmr, plate_name == plate_name)
my_list_jcsmr <- list()
for (i in unique(a$target_name)) {
print(i)
b <- filter(a, target_name == i) %>% select(filename, cycle, fluorescence) %>%
spread(filename, fluorescence) %>% as.data.frame
## give short form sample name
sample_ids <- colnames(b)[2:ncol(b)]
sample_ids <- paste(data_jcsmr$sample_name[match(sample_ids, data_jcsmr$filename)],
i, sep = "_")
colnames(b)[2:ncol(b)] <- sample_ids
## QC plots plot(MFIaggr(x=b)) ## plot Mean Fluorescence Intensity by cycle
## per target per plate plotCurves(x=b[,1], y=b[,c(2:ncol(b))], type='l', CPP
## = TRUE) ## initial visual assessment with CPP QC
my_list_jcsmr[[i]] <- b
}
## [1] "N gene"
## [1] "ORF1ab"
## [1] "Internal Control"
Standardize data and calculate Ct using threshold method.
par(mfrow=c(3,3))
CPP_list <- list()
th_list <- list()
for( i in names(my_list_rsb)){
a <- my_list_rsb[[i]]
### CPP functions to pre-process data (e.g. smooth, normalize, background correction)
res.CPP <- apply(a[, -1], MARGIN = 2, function(l) {CPP(a[, 1], l,
method="savgol", ## Savitzky-Golay smoothing
trans=T, ## background correction
method.reg="lmrob", ## robust linear reg
bg.range = c(1,20), ## set cycle range for background
bg.outliers = T, ## remove outliers in background
method.norm = "none" ## no normalization
)[["y.norm"]]})
## calculate Cqs using threshold method = 0.1
th.cyc.CPP <- apply(res.CPP, 2, function(l) {th.cyc(a[, 1], l, r = 0.1)[1, 1]})
matplot(res.CPP, type = "l", xlab = "Cycle", ylab = "Fluorescence", main = paste("CPP\n",i))
abline(h = 0.1, lty = 2)
}
par(mfrow=c(1,1))
The data from Vazyme kits are too noisy for reliable normalization (due to inability to diagnose aspecific ampification). Therefore, we cannot use the thresholding method in a consistent manner as above. Instead, we use the smoothed curves to interpolate first- and second-derivative maximums using the five-point stencil method.
We apply three filters to consider Ct values reliable:
par(mfrow=c(2,2))
cq <- NULL
for( i in names(my_list_jcsmr)){
a <- my_list_jcsmr[[i]]
### CPP functions to pre-process data (e.g. smooth, normalize, remove background, remove outliers)
res.CPP <- apply(a[, -1], MARGIN = 2, function(l) {CPP(a[, 1], l,
method="supsmu", ## Friedmann supersmoother
trans=T, ## background slope correction
method.reg="lmrob", ## robust linear reg
bg.range = c(1,22), ## set cycle range for background
method.norm = "none", ## no normalization
bg.outliers = T ### remove outliers in background
)[["y.norm"]]})
threshold = quantile(t(a[,-1]))[[1]]
hist(t(a[,-1]), breaks = 250, xlab = "Fluorescence", main = "Fluorescence distribution")
abline(v=threshold, col = "darkred")
## plot smoothed data
matplot(res.CPP, type = "l", xlab = "Cycle", ylab = "Fluorescence", main = paste("CPP\n",i))
## interpolate FDM and SDM from smoothed data and hold Cq values
for( h in 1:ncol(res.CPP)){
b <- clean_names(data.frame(cycle = a$cycle, res.CPP[, h]))
res <- inder(b, smooth.method = NULL)
summ <- summary(res, print = FALSE)
out <- as.data.frame(t(summ))
rownames(out) <- colnames(res.CPP)[h]
out$max <- max(res[,2])
out$threshold <- threshold
test <- ifelse(out$max < out$threshold, yes = "bad", no=
ifelse( out$FDM < 15 | out$SDM < 15, yes = "bad" , no =
ifelse( abs(out$SDC-out$FDM)>2, yes = "bad", no = "Good!")))
cq <- rbind(out, cq)
col <- rainbow(4)
## plot all obtained metrics
plot(res,
main=ifelse(test == "bad", yes = paste(colnames(res.CPP)[h], "removed!", sep='--'), no = colnames(res.CPP)[h]))
abline(v=summ, col = col)
text(summ["FDM"]-2, max(b$res_cpp_h)/1.2, paste0("FDM~", round(summ["FDM"], 2)), cex=0.8, col = col[1])
text(summ["SDM"]-2, max(b$res_cpp_h)/2, paste0("SDM~", round(summ["SDM"], 2)), cex=0.8, col = col[2])
text(summ["SDm"]-2, max(b$res_cpp_h)/3, paste0("SDm~", round(summ["SDm"], 2)), cex=0.8, col = col[3])
text(summ["SDC"]-2, max(b$res_cpp_h)/4, paste0("SDC~", round(summ["SDC"], 2)), cex=0.8, col = col[4])
abline(h=threshold, col = 'darkred')
}
}
input <- as.data.frame(cq) %>% rownames_to_column(var = 'sample') %>%
mutate(ct = ifelse(FDM<=15 | SDM<=15, yes = 40, no = SDM)) %>%
mutate(ct = ifelse(abs(SDC-FDM)<=2, yes = ct, no = 40)) %>%
mutate(ct = ifelse(max < threshold, yes = 40, no = ct)) %>%
mutate(sample = sapply(strsplit(sample, "\\."),function(l) l[1]))
lbls1 <- sapply(input$sample, function(l) { regexpr(l, pattern = "Internal Control") })
lbls1 <- lbls1[lbls1>1]
lbls2 <- sapply(input$sample, function(l) { regexpr(l, pattern = "N gene") })
lbls2 <- lbls2[lbls2>1]
lbls3 <- sapply(input$sample, function(l) { regexpr(l, pattern = "ORF1ab") })
lbls3 <- lbls3[lbls3>1]
lbls <- unlist(list(lbls1,lbls2,lbls3))
input <- input %>%
mutate(probe = substr(sample, start = lbls[match(sample, names(lbls))], stop = nchar(sample))) %>%
mutate(sample = substr(sample, start = 1, stop= lbls[match(sample, names(lbls))] -2))
par(mfrow=c(1,1))
ggplot(data=input, aes(x=sample, y=ct, colour=sample)) +
geom_jitter() + theme_bw() +
theme(axis.text.x = element_text(angle=45, hjust=1, size=7), legend.position = "none") +
scale_y_continuous(name = "Ct") +
scale_x_discrete(name = "Sample") +
facet_wrap(~probe, ncol = 1)